{% import 'macro.jinja2' as MRO with context %}{% if dataset_has_header == True %}{{ MRO.insert_tab()}}df = pd.read_csv(train_file_path)
{% else %}{{ MRO.insert_tab()}}df = pd.read_csv(train_file_path, header=None)
{{ MRO.insert_tab()}}df.columns = dataset_default_headers  # update columns
{% endif %}
{{ MRO.insert_tab()}}df.replace([np.Infinity, -np.Infinity], np.nan, inplace=True)
{{ MRO.insert_tab()}}n_rows = df.shape[0]
{{ MRO.insert_tab()}}df.dropna(axis=0, how='all', subset=[label_col], inplace=True)
{{ MRO.insert_tab()}}print(f"Total rows which label is None: {n_rows - df.shape[0]}")

{% if partition_strategy == 'manual' %}
{{ MRO.insert_tab()}}X_train = df[df[partition_col] == PartitionClass.Train]
{{ MRO.insert_tab()}}X_eval = df[df[partition_col] == PartitionClass.Eval]
{{ MRO.insert_tab()}}X_test = df[df[partition_col] == PartitionClass.Test]
{{ MRO.insert_tab()}}if X_train.shape[0] < 1:
{{ MRO.insert_tab()}}    raise Exception("No train rows in dataset ")
{{ MRO.insert_tab()}}if X_eval.shape[0] < 1:
{{ MRO.insert_tab()}}    raise Exception("No evaluate rows in dataset ")
{{ MRO.insert_tab()}}if X_test.shape[0] == 0:
{{ MRO.insert_tab()}}    raise Exception("No test rows in dataset ")
{% else %}
{% if datetime_series_col %}
{{ MRO.insert_tab()}}df = df.sort_values(axis=0, by='{{ datetime_series_col }}')
{% endif %}
{{ MRO.insert_tab()}}X_train, X_test = train_test_split(df, test_size={{ holdout_percentage / 100 }},  shuffle=False, random_state=random_state)
{% endif %}
{{ MRO.insert_tab()}}y_train = X_train.pop(label_col)

{{ MRO.insert_tab()}}y_test = X_test.pop(label_col)
{{ MRO.insert_tab()}}X_train = dataset_util.cast_df(X_train, model_feature_list, remove_unnecessary_cols=True)
{{ MRO.insert_tab()}}X_test = dataset_util.cast_df(X_test, model_feature_list, remove_unnecessary_cols=True)
{{ MRO.insert_tab()}}classes = list(set(y_train).union(set(y_test)))